from warnings import filterwarnings
filterwarnings('ignore')
import os
os.chdir('E:/Datasets/')
import pandas as pd
df=pd.read_csv('50_Startups (1).csv')
df.head()
| RND | ADMIN | MKT | STATE | PROFIT | |
|---|---|---|---|---|---|
| 0 | 165349.20 | 136897.80 | 471784.10 | New York | 192261.83 |
| 1 | 162597.70 | 151377.59 | 443898.53 | California | 191792.06 |
| 2 | 153441.51 | 101145.55 | 407934.54 | Florida | 191050.39 |
| 3 | 144372.41 | 118671.85 | 383199.62 | New York | 182901.99 |
| 4 | 142107.34 | 91391.77 | 366168.42 | Florida | 166187.94 |
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 50 entries, 0 to 49 Data columns (total 5 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 RND 50 non-null float64 1 ADMIN 50 non-null float64 2 MKT 50 non-null float64 3 STATE 50 non-null object 4 PROFIT 50 non-null float64 dtypes: float64(4), object(1) memory usage: 2.1+ KB
df.shape
(50, 5)
####for checking missing values
df.isna().sum()
RND 0 ADMIN 0 MKT 0 STATE 0 PROFIT 0 dtype: int64
import matplotlib.pyplot as plt
import seaborn as sns
plt.figure(figsize=(16,8))
plt.scatter(df['RND'] , df['PROFIT'])
plt.xlabel('RND')
plt.ylabel('PROFIT')
plt.title('RND VS PROFIT')
plt.grid()
plt.show()
# using seaborn library
plt.figure(figsize=(16,8))
sns.scatterplot(data=df,x='RND' ,y='PROFIT')
plt.title('RND VS PROFIT SEABORN')
plt.grid()
plt.show()
df.columns
Index(['RND', 'ADMIN', 'MKT', 'STATE', 'PROFIT'], dtype='object')
df.dtypes
RND float64 ADMIN float64 MKT float64 STATE object PROFIT float64 dtype: object
df[df['PROFIT']>150000]
| RND | ADMIN | MKT | STATE | PROFIT | |
|---|---|---|---|---|---|
| 0 | 165349.20 | 136897.80 | 471784.10 | New York | 192261.83 |
| 1 | 162597.70 | 151377.59 | 443898.53 | California | 191792.06 |
| 2 | 153441.51 | 101145.55 | 407934.54 | Florida | 191050.39 |
| 3 | 144372.41 | 118671.85 | 383199.62 | New York | 182901.99 |
| 4 | 142107.34 | 91391.77 | 366168.42 | Florida | 166187.94 |
| 5 | 131876.90 | 99814.71 | 362861.36 | New York | 156991.12 |
| 6 | 134615.46 | 147198.87 | 127716.82 | California | 156122.51 |
| 7 | 130298.13 | 145530.06 | 323876.68 | Florida | 155752.60 |
| 8 | 120542.52 | 148718.95 | 311613.29 | New York | 152211.77 |
cat=(df.columns[df.dtypes=='object'])
cat
Index(['STATE'], dtype='object')
#### if u dont want above output n want output only in list use--
cat=list(df.columns[df.dtypes=='object'])
cat
['STATE']
####sometimes the number form can be int , float n all so just use != sign
con=list(df.columns[df.dtypes!='object'])
con
['RND', 'ADMIN', 'MKT', 'PROFIT']
df['STATE'].value_counts()
New York 17 California 17 Florida 16 Name: STATE, dtype: int64
df['STATE'].value_counts().plot(kind='bar', title='Countplot for State' ,figsize=(16,8))
plt.show()
df['STATE'].value_counts().plot(kind='pie' , title='Countplot for State Pie',figsize=(16,8))
plt.show()
cat
['STATE']
for i in cat:
df[i].value_counts().plot(kind ='bar', title =f'Countplot for {i}' , figsize=(16,8))
plt.show()
con
['RND', 'ADMIN', 'MKT', 'PROFIT']
plt.figure(figsize=(16,8))
sns.histplot(data=df, x= 'ADMIN' , kde=True)
plt.title('Histogram for Admin')
plt.grid()
plt.show()
con
['RND', 'ADMIN', 'MKT', 'PROFIT']
for i in con:
plt.figure(figsize=(16,8))
sns.histplot(data=df, x=i , kde=True)
plt.title(f'Histogram for {i}')
####dont add grid() option in histogram as it dosent look proper n good ..can add only in scatter plot
plt.show()
import pandas as pd
df=pd.read_csv('Cars93 (1).csv')
df.head()
| id | Manufacturer | Model | Type | Min.Price | Price | Max.Price | MPG.city | MPG.highway | AirBags | ... | Passengers | Length | Wheelbase | Width | Turn.circle | Rear.seat.room | Luggage.room | Weight | Origin | Make | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | Acura | Integra | Small | 12.9 | 15.9 | 18.8 | 25 | 31 | None | ... | 5 | 177 | 102 | 68 | 37 | 26.5 | 11.0 | 2705 | non-USA | Acura Integra |
| 1 | 2 | Acura | Legend | Midsize | 29.2 | 33.9 | 38.7 | 18 | 25 | Driver & Passenger | ... | 5 | 195 | 115 | 71 | 38 | 30.0 | 15.0 | 3560 | non-USA | Acura Legend |
| 2 | 3 | Audi | 90 | Compact | 25.9 | 29.1 | 32.3 | 20 | 26 | Driver only | ... | 5 | 180 | 102 | 67 | 37 | 28.0 | 14.0 | 3375 | non-USA | Audi 90 |
| 3 | 4 | Audi | 100 | Midsize | 30.8 | 37.7 | 44.6 | 19 | 26 | NaN | ... | 6 | 193 | 106 | 70 | 37 | 31.0 | 17.0 | 3405 | non-USA | Audi 100 |
| 4 | 5 | BMW | 535i | Midsize | 23.7 | 30.0 | 36.2 | 22 | 30 | Driver only | ... | 4 | 186 | 109 | 69 | 39 | 27.0 | 13.0 | 3640 | non-USA | BMW 535i |
5 rows × 28 columns
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 93 entries, 0 to 92 Data columns (total 28 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 id 93 non-null int64 1 Manufacturer 93 non-null object 2 Model 93 non-null object 3 Type 93 non-null object 4 Min.Price 93 non-null float64 5 Price 93 non-null float64 6 Max.Price 93 non-null float64 7 MPG.city 93 non-null int64 8 MPG.highway 93 non-null int64 9 AirBags 89 non-null object 10 DriveTrain 93 non-null object 11 Cylinders 93 non-null object 12 EngineSize 93 non-null float64 13 Horsepower 93 non-null int64 14 RPM 93 non-null int64 15 Rev.per.mile 93 non-null int64 16 Man.trans.avail 93 non-null object 17 Fuel.tank.capacity 93 non-null float64 18 Passengers 93 non-null int64 19 Length 93 non-null int64 20 Wheelbase 93 non-null int64 21 Width 93 non-null int64 22 Turn.circle 93 non-null int64 23 Rear.seat.room 91 non-null float64 24 Luggage.room 82 non-null float64 25 Weight 93 non-null int64 26 Origin 93 non-null object 27 Make 93 non-null object dtypes: float64(7), int64(12), object(9) memory usage: 20.5+ KB
s=df.isna().sum()
s
id 0 Manufacturer 0 Model 0 Type 0 Min.Price 0 Price 0 Max.Price 0 MPG.city 0 MPG.highway 0 AirBags 4 DriveTrain 0 Cylinders 0 EngineSize 0 Horsepower 0 RPM 0 Rev.per.mile 0 Man.trans.avail 0 Fuel.tank.capacity 0 Passengers 0 Length 0 Wheelbase 0 Width 0 Turn.circle 0 Rear.seat.room 2 Luggage.room 11 Weight 0 Origin 0 Make 0 dtype: int64
s[s>0]
AirBags 4 Rear.seat.room 2 Luggage.room 11 dtype: int64
df.duplicated().sum()
0
df=df.drop(columns=['id'])
df.head()
| Manufacturer | Model | Type | Min.Price | Price | Max.Price | MPG.city | MPG.highway | AirBags | DriveTrain | ... | Passengers | Length | Wheelbase | Width | Turn.circle | Rear.seat.room | Luggage.room | Weight | Origin | Make | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Acura | Integra | Small | 12.9 | 15.9 | 18.8 | 25 | 31 | None | Front | ... | 5 | 177 | 102 | 68 | 37 | 26.5 | 11.0 | 2705 | non-USA | Acura Integra |
| 1 | Acura | Legend | Midsize | 29.2 | 33.9 | 38.7 | 18 | 25 | Driver & Passenger | Front | ... | 5 | 195 | 115 | 71 | 38 | 30.0 | 15.0 | 3560 | non-USA | Acura Legend |
| 2 | Audi | 90 | Compact | 25.9 | 29.1 | 32.3 | 20 | 26 | Driver only | Front | ... | 5 | 180 | 102 | 67 | 37 | 28.0 | 14.0 | 3375 | non-USA | Audi 90 |
| 3 | Audi | 100 | Midsize | 30.8 | 37.7 | 44.6 | 19 | 26 | NaN | Front | ... | 6 | 193 | 106 | 70 | 37 | 31.0 | 17.0 | 3405 | non-USA | Audi 100 |
| 4 | BMW | 535i | Midsize | 23.7 | 30.0 | 36.2 | 22 | 30 | Driver only | Rear | ... | 4 | 186 | 109 | 69 | 39 | 27.0 | 13.0 | 3640 | non-USA | BMW 535i |
5 rows × 27 columns
list(df.columns)
['Manufacturer', 'Model', 'Type', 'Min.Price', 'Price', 'Max.Price', 'MPG.city', 'MPG.highway', 'AirBags', 'DriveTrain', 'Cylinders', 'EngineSize', 'Horsepower', 'RPM', 'Rev.per.mile', 'Man.trans.avail', 'Fuel.tank.capacity', 'Passengers', 'Length', 'Wheelbase', 'Width', 'Turn.circle', 'Rear.seat.room', 'Luggage.room', 'Weight', 'Origin', 'Make']
df.dtypes
Manufacturer object Model object Type object Min.Price float64 Price float64 Max.Price float64 MPG.city int64 MPG.highway int64 AirBags object DriveTrain object Cylinders object EngineSize float64 Horsepower int64 RPM int64 Rev.per.mile int64 Man.trans.avail object Fuel.tank.capacity float64 Passengers int64 Length int64 Wheelbase int64 Width int64 Turn.circle int64 Rear.seat.room float64 Luggage.room float64 Weight int64 Origin object Make object dtype: object
cat=list(df.columns[df.dtypes==object])
con=list(df.columns[df.dtypes!=object])
cat
['Manufacturer', 'Model', 'Type', 'AirBags', 'DriveTrain', 'Cylinders', 'Man.trans.avail', 'Origin', 'Make']
con
['Min.Price', 'Price', 'Max.Price', 'MPG.city', 'MPG.highway', 'EngineSize', 'Horsepower', 'RPM', 'Rev.per.mile', 'Fuel.tank.capacity', 'Passengers', 'Length', 'Wheelbase', 'Width', 'Turn.circle', 'Rear.seat.room', 'Luggage.room', 'Weight']
df[cat].describe().T
| count | unique | top | freq | |
|---|---|---|---|---|
| Manufacturer | 93 | 32 | Chevrolet | 8 |
| Model | 93 | 93 | Integra | 1 |
| Type | 93 | 6 | Midsize | 22 |
| AirBags | 89 | 3 | Driver only | 42 |
| DriveTrain | 93 | 3 | Front | 67 |
| Cylinders | 93 | 6 | 4 | 49 |
| Man.trans.avail | 93 | 2 | Yes | 61 |
| Origin | 93 | 2 | USA | 48 |
| Make | 93 | 93 | Acura Integra | 1 |
df['Cylinders'].value_counts()
4 49 6 31 8 7 3 3 5 2 rotary 1 Name: Cylinders, dtype: int64
df['Origin'].value_counts()
USA 48 non-USA 45 Name: Origin, dtype: int64
df[con].describe().T
| count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|
| Min.Price | 93.0 | 17.125806 | 8.746029 | 6.7 | 10.8 | 14.7 | 20.3 | 45.4 |
| Price | 93.0 | 19.509677 | 9.659430 | 7.4 | 12.2 | 17.7 | 23.3 | 61.9 |
| Max.Price | 93.0 | 21.898925 | 11.030457 | 7.9 | 14.7 | 19.6 | 25.3 | 80.0 |
| MPG.city | 93.0 | 22.365591 | 5.619812 | 15.0 | 18.0 | 21.0 | 25.0 | 46.0 |
| MPG.highway | 93.0 | 29.086022 | 5.331726 | 20.0 | 26.0 | 28.0 | 31.0 | 50.0 |
| EngineSize | 93.0 | 2.667742 | 1.037363 | 1.0 | 1.8 | 2.4 | 3.3 | 5.7 |
| Horsepower | 93.0 | 143.827957 | 52.374410 | 55.0 | 103.0 | 140.0 | 170.0 | 300.0 |
| RPM | 93.0 | 5280.645161 | 596.731690 | 3800.0 | 4800.0 | 5200.0 | 5750.0 | 6500.0 |
| Rev.per.mile | 93.0 | 2332.204301 | 496.506525 | 1320.0 | 1985.0 | 2340.0 | 2565.0 | 3755.0 |
| Fuel.tank.capacity | 93.0 | 16.664516 | 3.279370 | 9.2 | 14.5 | 16.4 | 18.8 | 27.0 |
| Passengers | 93.0 | 5.086022 | 1.038979 | 2.0 | 4.0 | 5.0 | 6.0 | 8.0 |
| Length | 93.0 | 183.204301 | 14.602382 | 141.0 | 174.0 | 183.0 | 192.0 | 219.0 |
| Wheelbase | 93.0 | 103.946237 | 6.819674 | 90.0 | 98.0 | 103.0 | 110.0 | 119.0 |
| Width | 93.0 | 69.376344 | 3.778986 | 60.0 | 67.0 | 69.0 | 72.0 | 78.0 |
| Turn.circle | 93.0 | 38.956989 | 3.223265 | 32.0 | 37.0 | 39.0 | 41.0 | 45.0 |
| Rear.seat.room | 91.0 | 27.829670 | 2.989072 | 19.0 | 26.0 | 27.5 | 30.0 | 36.0 |
| Luggage.room | 82.0 | 13.890244 | 2.997967 | 6.0 | 12.0 | 14.0 | 15.0 | 22.0 |
| Weight | 93.0 | 3072.903226 | 589.896510 | 1695.0 | 2620.0 | 3040.0 | 3525.0 | 4105.0 |
import matplotlib.pyplot as plt
import seaborn as sns
cat
['Manufacturer', 'Model', 'Type', 'AirBags', 'DriveTrain', 'Cylinders', 'Man.trans.avail', 'Origin', 'Make']
for i in cat:
df[i].value_counts().plot(kind='bar',
title=f'Countplot for {i}',
figsize=(16,8))
plt.show()
con
['Min.Price', 'Price', 'Max.Price', 'MPG.city', 'MPG.highway', 'EngineSize', 'Horsepower', 'RPM', 'Rev.per.mile', 'Fuel.tank.capacity', 'Passengers', 'Length', 'Wheelbase', 'Width', 'Turn.circle', 'Rear.seat.room', 'Luggage.room', 'Weight']
for i in con:
plt.figure(figsize=(16,8))
sns.histplot(data=df , x=i , kde=True)
plt.title(f'Histogram for {i}')
plt.show()
con
['Min.Price', 'Price', 'Max.Price', 'MPG.city', 'MPG.highway', 'EngineSize', 'Horsepower', 'RPM', 'Rev.per.mile', 'Fuel.tank.capacity', 'Passengers', 'Length', 'Wheelbase', 'Width', 'Turn.circle', 'Rear.seat.room', 'Luggage.room', 'Weight']
plt.figure(figsize=(16,8))
sns.scatterplot(data=df , x='Length' , y='Price')
plt.title('Scatterplot for Length VS Price')
plt.grid()
plt.show()
plt.figure(figsize=(16,8))
plt.scatter(df['Length'], df['Price'])
plt.xlabel('Length')
plt.ylabel('Price')
plt.title('Length VS Price')
plt.grid()
plt.show()
corr=df[con].corr()
corr
| Min.Price | Price | Max.Price | MPG.city | MPG.highway | EngineSize | Horsepower | RPM | Rev.per.mile | Fuel.tank.capacity | Passengers | Length | Wheelbase | Width | Turn.circle | Rear.seat.room | Luggage.room | Weight | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Min.Price | 1.000000 | 0.970601 | 0.906756 | -0.622875 | -0.579966 | 0.645488 | 0.802444 | -0.042598 | -0.470395 | 0.635369 | 0.061236 | 0.553859 | 0.516758 | 0.492878 | 0.428603 | 0.376642 | 0.413485 | 0.666554 |
| Price | 0.970601 | 1.000000 | 0.981580 | -0.594562 | -0.560680 | 0.597425 | 0.788218 | -0.004955 | -0.426395 | 0.619480 | 0.057860 | 0.503628 | 0.500864 | 0.456028 | 0.392590 | 0.311499 | 0.366569 | 0.647179 |
| Max.Price | 0.906756 | 0.981580 | 1.000000 | -0.547811 | -0.522561 | 0.535012 | 0.744445 | 0.025015 | -0.374024 | 0.581294 | 0.053216 | 0.442933 | 0.467501 | 0.408414 | 0.347785 | 0.247260 | 0.315315 | 0.605142 |
| MPG.city | -0.622875 | -0.594562 | -0.547811 | 1.000000 | 0.943936 | -0.710003 | -0.672636 | 0.363045 | 0.695857 | -0.813144 | -0.416856 | -0.666239 | -0.667108 | -0.720534 | -0.666389 | -0.384347 | -0.494894 | -0.843139 |
| MPG.highway | -0.579966 | -0.560680 | -0.522561 | 0.943936 | 1.000000 | -0.626795 | -0.619044 | 0.313469 | 0.587497 | -0.786039 | -0.466386 | -0.542897 | -0.615384 | -0.640359 | -0.593683 | -0.366684 | -0.371629 | -0.810658 |
| EngineSize | 0.645488 | 0.597425 | 0.535012 | -0.710003 | -0.626795 | 1.000000 | 0.732120 | -0.547898 | -0.824009 | 0.759306 | 0.372721 | 0.780283 | 0.732484 | 0.867110 | 0.778464 | 0.502750 | 0.680827 | 0.845075 |
| Horsepower | 0.802444 | 0.788218 | 0.744445 | -0.672636 | -0.619044 | 0.732120 | 1.000000 | 0.036688 | -0.600314 | 0.711790 | 0.009264 | 0.550865 | 0.486854 | 0.644413 | 0.561216 | 0.256732 | 0.359217 | 0.738798 |
| RPM | -0.042598 | -0.004955 | 0.025015 | 0.363045 | 0.313469 | -0.547898 | 0.036688 | 1.000000 | 0.494764 | -0.333345 | -0.467138 | -0.441249 | -0.467812 | -0.539721 | -0.505651 | -0.342175 | -0.524845 | -0.427931 |
| Rev.per.mile | -0.470395 | -0.426395 | -0.374024 | 0.695857 | 0.587497 | -0.824009 | -0.600314 | 0.494764 | 1.000000 | -0.609710 | -0.334976 | -0.690233 | -0.636824 | -0.780460 | -0.733160 | -0.377010 | -0.592792 | -0.735264 |
| Fuel.tank.capacity | 0.635369 | 0.619480 | 0.581294 | -0.813144 | -0.786039 | 0.759306 | 0.711790 | -0.333345 | -0.609710 | 1.000000 | 0.472095 | 0.690461 | 0.757674 | 0.798719 | 0.671343 | 0.509689 | 0.613437 | 0.894018 |
| Passengers | 0.061236 | 0.057860 | 0.053216 | -0.416856 | -0.466386 | 0.372721 | 0.009264 | -0.467138 | -0.334976 | 0.472095 | 1.000000 | 0.485294 | 0.694054 | 0.489979 | 0.449025 | 0.694134 | 0.653317 | 0.553273 |
| Length | 0.553859 | 0.503628 | 0.442933 | -0.666239 | -0.542897 | 0.780283 | 0.550865 | -0.441249 | -0.690233 | 0.690461 | 0.485294 | 1.000000 | 0.823650 | 0.822148 | 0.738955 | 0.549958 | 0.712962 | 0.806274 |
| Wheelbase | 0.516758 | 0.500864 | 0.467501 | -0.667108 | -0.615384 | 0.732484 | 0.486854 | -0.467812 | -0.636824 | 0.757674 | 0.694054 | 0.823650 | 1.000000 | 0.807213 | 0.723324 | 0.667259 | 0.734127 | 0.871895 |
| Width | 0.492878 | 0.456028 | 0.408414 | -0.720534 | -0.640359 | 0.867110 | 0.644413 | -0.539721 | -0.780460 | 0.798719 | 0.489979 | 0.822148 | 0.807213 | 1.000000 | 0.817854 | 0.465618 | 0.673490 | 0.874961 |
| Turn.circle | 0.428603 | 0.392590 | 0.347785 | -0.666389 | -0.593683 | 0.778464 | 0.561216 | -0.505651 | -0.733160 | 0.671343 | 0.449025 | 0.738955 | 0.723324 | 0.817854 | 1.000000 | 0.466328 | 0.585018 | 0.778043 |
| Rear.seat.room | 0.376642 | 0.311499 | 0.247260 | -0.384347 | -0.366684 | 0.502750 | 0.256732 | -0.342175 | -0.377010 | 0.509689 | 0.694134 | 0.549958 | 0.667259 | 0.465618 | 0.466328 | 1.000000 | 0.651968 | 0.526250 |
| Luggage.room | 0.413485 | 0.366569 | 0.315315 | -0.494894 | -0.371629 | 0.680827 | 0.359217 | -0.524845 | -0.592792 | 0.613437 | 0.653317 | 0.712962 | 0.734127 | 0.673490 | 0.585018 | 0.651968 | 1.000000 | 0.637226 |
| Weight | 0.666554 | 0.647179 | 0.605142 | -0.843139 | -0.810658 | 0.845075 | 0.738798 | -0.427931 | -0.735264 | 0.894018 | 0.553273 | 0.806274 | 0.871895 | 0.874961 | 0.778043 | 0.526250 | 0.637226 | 1.000000 |
### ya data=df nhi likhte directly corelation pass karte haii
plt.figure(figsize=(25,25))
sns.heatmap(corr,annot=True) ### here annot displays the data value
plt.show()
plt.figure(figsize=(25,25))
plt.heatmap(corr,annot=True)
plt.show() #################### here graph is not plot because mathplotlib dosent follow heatmap
--------------------------------------------------------------------------- AttributeError Traceback (most recent call last) Cell In[51], line 2 1 plt.figure(figsize=(25,25)) ----> 2 plt.heatmap(corr,annot=True) 3 plt.show() AttributeError: module 'matplotlib.pyplot' has no attribute 'heatmap'
<Figure size 2500x2500 with 0 Axes>
cat
['Manufacturer', 'Model', 'Type', 'AirBags', 'DriveTrain', 'Cylinders', 'Man.trans.avail', 'Origin', 'Make']
con
['Min.Price', 'Price', 'Max.Price', 'MPG.city', 'MPG.highway', 'EngineSize', 'Horsepower', 'RPM', 'Rev.per.mile', 'Fuel.tank.capacity', 'Passengers', 'Length', 'Wheelbase', 'Width', 'Turn.circle', 'Rear.seat.room', 'Luggage.room', 'Weight']
plt.figure(figsize=(16,8))
sns.boxplot(data=df , x='Type', y='Price')
plt.title('Type VS Price')
plt.show()
plt.figure(figsize=(16,8))
sns.boxplot(data=df, x='Price', y='AirBags')
plt.title('Boxplot for AirBags vs Price')
plt.show()
for i in cat:
plt.figure(figsize=(16,8))
sns.boxplot(data=df, x=i, y='Price')
plt.title(f'Box plot for Price VS {i} ')
plt.show()
for i in cat:
plt.figure(figsize=(16,8))
sns.boxplot(data=df, x=i, y='MPG.city')
plt.title(f'Boxplot for {i} vs MPG.city')
plt.show()
cat
['Manufacturer', 'Model', 'Type', 'AirBags', 'DriveTrain', 'Cylinders', 'Man.trans.avail', 'Origin', 'Make']
cr=pd.crosstab(df['Type'],df['Cylinders'])
cr
| Cylinders | 3 | 4 | 5 | 6 | 8 | rotary |
|---|---|---|---|---|---|---|
| Type | ||||||
| Compact | 0 | 15 | 0 | 1 | 0 | 0 |
| Large | 0 | 0 | 0 | 7 | 4 | 0 |
| Midsize | 0 | 7 | 1 | 12 | 2 | 0 |
| Small | 3 | 18 | 0 | 0 | 0 | 0 |
| Sporty | 0 | 8 | 0 | 4 | 1 | 1 |
| Van | 0 | 1 | 1 | 7 | 0 | 0 |
sns.heatmap(cr,annot=True)
<Axes: xlabel='Cylinders', ylabel='Type'>
con
['Min.Price', 'Price', 'Max.Price', 'MPG.city', 'MPG.highway', 'EngineSize', 'Horsepower', 'RPM', 'Rev.per.mile', 'Fuel.tank.capacity', 'Passengers', 'Length', 'Wheelbase', 'Width', 'Turn.circle', 'Rear.seat.room', 'Luggage.room', 'Weight']
cat
['Manufacturer', 'Model', 'Type', 'AirBags', 'DriveTrain', 'Cylinders', 'Man.trans.avail', 'Origin', 'Make']
cr1=pd.crosstab(df['Man.trans.avail'],df['DriveTrain'])
cr1
| DriveTrain | 4WD | Front | Rear |
|---|---|---|---|
| Man.trans.avail | |||
| No | 3 | 22 | 7 |
| Yes | 7 | 45 | 9 |
sns.heatmap(cr1,annot=True)
<Axes: xlabel='DriveTrain', ylabel='Man.trans.avail'>
sns.pairplot(data=df)
<seaborn.axisgrid.PairGrid at 0x25a87274750>
sns.pairplot(data=df,hue='Type')
<seaborn.axisgrid.PairGrid at 0x25a9e431150>